/*******************************************************************************

[Last updated: June 4th, 2024]

This script is the first one in the runny sequence for the Endline data analysis

The script is organized as follows:

	Part A: Coding the Pre-Virtra Data
	Part B: Coding the Rubrics Data
	Part C: Coding the Post-Virtra Data
	
In each part, the script works with a specific number of survey items included
in the data, with repetitive procedural as follows:

	Step 1: Final data cleaning (including categorization and feature creation)
	Step 2: Creating index variables
	Step 3: Running several regressions on a set of outcomes in the data
	
All program estimations are stored temporarily in Stata memory, and are ready for
the next script

*******************************************************************************/

		
	
* (0) Program definitions ------------------------------------------------------
		 
	* Combined process
	capture program drop reg_store
	program define reg_store
	args outcome
	
		* Run the no-covar ITT
		ITT `outcome' 	no_covar 			N
		
		* Run the hand-picked covar ITT
		ITT `outcome'	officer_baseline	H
		
		* Run the LASSO
		LASSO `outcome'	consideration		L
		
		* Run heterogeneous treatment effect analysis for experience
		gen t = treatment		// To prevent very long matrix name
		gen e = experience
		gen te = t* e
		
		gl target_list 	t e te
		gl consideration_1 ///
				dblack dhispanic dwhite dmale ///
				pre_arrest_con_cop pre_prs_score_w_arrs pre_days_iod
				// Consideration set minus experience
		
			gl covar 		t e te
			ITT_target `outcome' covar 				hN
			
			gl covar 		t e te $consideration_1
			ITT_target `outcome' covar 				hH
			
			gl covar 		t e te 
			LASSO_target `outcome' consideration_1 	hL
		
		drop t e te
		
	end
	
	/* This customized program allows us to perform OLS (with and without 
	baseline covariates) and LASSO regression in one run */
	
		 
		 
		 
		 
		 
* PART A: Pre Virtual training /////////////////////////////////////////////////


	* Clean the data -----------------------------------------------------------
	
	use $rand, clear
	
	* Get demographic data
	gen id = employee_id
	merge 1:1 id using $demo, nogen keep(1 3) keepusing(age race gender pers_yoj)
	gen dmale 		= (gender == "M")
	gen dwhite 		= inlist(race, "WHI")
	gen dblack 		= inlist(race, "BLK")
	gen dhispanic 	= inlist(race, "WWH", "WBH", "S")
	gen dother 		= !inlist(race, "WWH", "WBH", "BLK", "WHI", "S")
	gen experience = pers_yoj

	* Treatment ratio
	bys strata: egen ratio = total(treatment)
	bys strata: gen total_officers = _N
	replace ratio = 100 * (ratio / total_officers) - 50
	gen ratio_treat = ratio * treat
	
	* Save
	save $rand_cleaned, replace
	
	* For the assessment, drop officer with no treatment status
	drop if missing(treatment)
		
	* Merge with assesment data
	if inlist("${version}", "base", "online"){
	merge 1:1 survey_id using ${out_pre}_${version}, keep(3) nogen
	}
	if inlist("${version}", "full"){
	merge 1:m survey_id using ${out_pre}_${version}, keep(3) nogen
	}
	
	* Merge with quantitatively coded
	merge 1:1 survey_id using ${pre_coded}, keep(1 3) nogen
	drop total_E total_A total_NA
	
	merge 1:1 survey_id using ${pre_coded_int}, ///
		keep(1 3) nogen ///
		keepusing(total_interpretations ///
					dummy_twocategories dummy_A dummy_E dummy_NA ///
					total_A total_E total_NA)
		
	merge 1:1 survey_id using $pre_coded_act , keep(1 3) nogen
		
	
	* Merge with pretraining admin data
	merge 1:1 employee_id using $pre_admin, keep(1 3) nogen
	
	* Race group
	gen Black = (picsgroup == "black")
	gen White = (picsgroup == "white")
	gen Pooled = 1
	
	* Componential z calculation -----------------------------------------------
	gl get_z 	3 4 5 6 8 ///
				17_1 ///
				17_2 17_3 ///
				15_1 15_2 15_3 ///
				13_1 13_2 13_3 13_4 13_5 ///
				20 71 75 77 80 ///
				51 109 ///
				53121 55123 56124 ///
				50 108 ///
				26 28 30 33 36

	foreach q in $get_z {
	z_cal q`q'_score
	}
	

	
	* Groups of variable -------------------------------------------------------
		
	* Knowledge of concepts covered in training
	egen knowledge = rowmean(z_q3_score z_q4_score z_q5_score z_q6_score z_q8_score )
	gl knowledge_component ///
		knowledge ///
		q3_score q4_score q5_score q6_score q8_score
	
	* Strategies to Cope with Stress 
	egen stress = rowmean(z_q17_1_score z_q17_2_score z_q17_3_score)
	gl stress_component ///
		stress ///
		q17_1_score q17_2_score q17_3_score
			
	* Regulation of Emotions  
	egen emotion = rowmean(z_q15_1_score z_q15_2_score z_q15_3_score)
	gl emotion_component ///
		emotion ///
		q15_1_score q15_2_score q15_3_score

	* Confidence in Policing
	egen policing = rowmean(z_q13_1 z_q13_2 z_q13_3 z_q13_4 z_q13_5)
	gl confidence_component ///
		policing ///
		q13_1_score q13_2_score q13_3_score q13_4_score q13_5_score
	
	* Personalization 
	egen personalization  = rowmean(z_q20 z_q71 z_q75 z_q77 z_q80)
	gl personalization_component ///
		personalization ///
		q20_score q71_score q75_score q77_score q80_score
		
	* Knowledge of Use of Force Policy 
	egen assailants_correct = 	rowmean(z_q50_score z_q108_score)
	egen force_correct = 		rowmean(z_q51_s z_q109_s)
	egen appropriate_action2 = 	rowmean(z_q53121 z_q55123 z_q56124)
	
	foreach q in 50 55 123 108 48 53_1 121 106_1 51 56 109 {
			z_cal q`q'_level
	}
	
	gl knowledge_UOF_component ///
		appropriate_action2 ///
			z_q53121 z_q55123 z_q56124 ///
		assailants_correct ///
		force_correct ///
		z_q50_level ///
		
	* Processing Information and Forming Interpretations
	egen overall = 	rowmean(z_q26 z_q28 z_q30 z_q33 z_q36)
	
			* Z-score: Time spent deciding, both task
			foreach q in 26 28 30 33 36 {
			z_cal timing_`q'_log
			}
			egen t_overall = 	///
			rowmean(z_timing_26_log ///
					z_timing_28_log ///
					z_timing_30_log ///
					z_timing_33_log ///
					z_timing_36_log)
			
			* Z-score: Time spent processing (officer-timed task)
			foreach q in 25 27 29 {
			z_cal timing_`q'_log
			}
			egen proccessing_log_time = ///
			rowmean(z_timing_25_log z_timing_27_log z_timing_29_log)
			
			* Z-score: picture
			foreach pic in bike house car paint mart{
				z_cal `pic'featuressel
				z_cal `pic'featuresunsel
			}		
			egen zall_features_sel		= 	rowmean(z_*featuressel)
			egen zall_features_unsel	= 	rowmean(z_*featuresunsel)											
	
	* All together
	gl processing_forming ///
			zall_features_unsel ///
			zall_features_sel ///
			overall ///
			t_overall ///
			proccessing_log_time ///


	* Alternative Interpretations of a Subject's Actions
	gl interpretation ///
		total_interpretations ///
		dummy_twocategories ///
		dummy_A ///
		dummy_E ///
		dummy_NA ///
		total_A ///
		total_E ///
		total_NA
	
	
	* Use of Force in Dynamic Situations
	foreach type in appropriate nonappropriate {
		foreach n in 1 2 3{
				z_cal `type'_v`n'
			}
			egen z_`type' = rowmean(z_`type'_v1 z_`type'_v2 z_`type'_v3)
		}
	
	gl UOF_in_dynamic ///
		appropriate_action2 ///
		z_appropriate ///
		z_nonappropriate
		
	* Run all regressions in this section --------------------------------------

	foreach outcome in ///
		$knowledge_component ///
		$stress_component ///
		$emotion_component ///
		$confidence_component ///
		$personalization_component ///
		$knowledge_UOF_component ///
		$processing_forming ///
		$interpretation ///
		$UOF_in_dynamic {
		
		reg_store `outcome'
	}

	* Spill over analysis
	mat spill_over_endline = J(1,15,.)
	gl spill_outcomes ///
		total_interpretations dummy_twocategories dummy_A dummy_E dummy_NA ///
		zall_features_unsel zall_features_sel overall t_overall proccessing_log_time ///
		appropriate_action2 z_appropriate z_nonappropriate 
	
	foreach outcome in $spill_outcomes {

		gl covar_spill_over 		$officer_baseline t R rt
		gen R = ratio
		gen rt = ratio_treat
		gen t = treatment
		gl target_list				t R rt
		ITT_target 					`outcome' covar_spill_over	SPL
		drop t R rt
		mat add = SPL_`outcome'_t, SPL_`outcome'_R, SPL_`outcome'_rt
		mat spill_over_endline = spill_over_endline \ add
	}
	mat li spill_over_endline
	
	
	
	
	
	
* PART A: Rubric data //////////////////////////////////////////////////////////

	* Clean the data
	use $rand_cleaned, clear
	
	* Merge in rubric data
	drop if missing(treatment)
	merge 1:m survey_id using ${out_rub}_${version}, keep(3) nogen
	merge 1:1 survey_id using ${pre_out}_${version}, ///
		nogen keep(1 3) keepusing(knowledge_index regulation_index)

	* Merge with pretraining admin data
	merge 1:1 employee_id using $pre_admin, keep(1 3) nogen	
	
	* Compute z_score
	gl get_z 	19 34 45 ///
				20 35 46 ///
				18 33 44 ///
				21 36 47 ///
				22 37 48 ///
				23 38 49 ///
				
	foreach q in $get_z {
		z_cal q`q'_score
	}
	
	* Movement and Communication in Scenarios
	egen communicate = rowmean(z_q21 z_q36 z_q47)
	egen verbalize 	 = rowmean(z_q22 z_q37 z_q48)
	egen radio 		 = rowmean(z_q23 z_q38 z_q49)
	egen freeze 	 = rowmean(z_q18 z_q33 z_q44)
	egen moving 	 = rowmean(z_q19 z_q34 z_q45)
	egen cover  	 = rowmean(z_q20 z_q35 z_q46)
	egen moving_cover= rowmean(z_q19 z_q34 z_q45 z_q20 z_q35 z_q46) 

	gl communication communicate verbalize radio freeze moving_cover

	
	* Shooting in the FOS - individual outcomes
	gl shooting_in_FOS q55_score q52_score q54_score q41_score
		
	* Run regression for these outcomes
	foreach outcome in $communication $shooting_in_FOS {
		reg_store `outcome'
	}
		
	* Shooting in the FOS, stacked outcomes
	
		* First, save outcomes to separate data
		foreach v in q55_score q52_score q54_score q41_score {
		preserve
			keep if !missing(`v')
			gen shooting_FOS = `v'
			gen risk = 0
			replace risk = 1 if inlist("`v'", "q55_score", "q52_score")
			gen shoot_appropriate = 0
			replace shoot_appropriate = 1 if inlist("`v'", "q55_score", "q52_score") & (`v' == 1)
			replace shoot_appropriate = 1 if !inlist("`v'", "q55_score", "q52_score") & (`v' == 0)
			tempfile `v'
			save ``v''
		restore
		}
	
		* Then stack and run the analysis
		preserve
		clear
		
			app using `q55_score' `q52_score' `q54_score' `q41_score'
			gen treatment_risk = treatment * risk
			
			gl target_list treatment risk treatment_risk
			
			gl covar treatment risk treatment_risk
			ITT_target shooting_FOS covar 				FN
			
			gl covar treatment risk treatment_risk $officer_baseline
			ITT_target shooting_FOS covar 				FH
			
			gl covar treatment risk treatment_risk
			LASSO_target shooting_FOS consideration 	FL
			
			
			* Heterogeneous treatment effects
			gen te = treatment * experience
			gen re = risk * experience
			gen tre = treatment * risk * experience
			
			gl target_list 			treatment risk experience treatment_risk te re tre
			
			gl covar 				treatment risk experience treatment_risk te re tre
			ITT_target shooting_FOS covar 				hN
			
			gl covar 				treatment risk experience treatment_risk te re tre $consideration_1
			ITT_target shooting_FOS covar 				hH
			
			gl covar 				treatment risk experience treatment_risk te re tre
			LASSO_target shooting_FOS consideration_1 	hL
		
		restore

	
	
	
	
	
	
* PART C: Post Virtual training ////////////////////////////////////////////////

	* Clean data ---------------------------------------------------------------
	use $rand_cleaned, clear

	drop if missing(treatment)
	merge 1:1 survey_id using $post_cleaned, nogen keep(3)
	
	* Merge with pretraining admin data
	merge 1:1 employee_id using $pre_admin, keep(1 3) nogen

	* Recall of Details in Scenarios
	foreach q in q5_pricorrect q5_seccorrect q5_off q3_correct {
		z_cal `q'
	}
	egen item = rowmean(z_q5_pricorrect  z_q3_correct)
	
	* Post-scenario Questions
	
		foreach s in 1 2 {
			capture: gen q4_clarity_`s' = q4_clarity if scenario3 == "Street Stop `s'"
			capture: gen q6_clarity_`s' = q6_clarity if scenario3 == "Street Stop `s'"
		}
		
		z_cal q4_clarity
		z_cal q6_clarity
		
		egen clarity = rowmean(z_q4_clarity z_q6_clarity)
	
	* Run regression
	foreach outcome in ///
		item clarity ///
		q5_pricorrect q3_correct ///
		q4_clarity_1 q4_clarity_2 ///
		q6_clarity_1 q6_clarity_2  {
			
			reg_store `outcome'
		}
		
		
		